<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Gemini API Image Bounding Box Visualization</title>
    <script type="module">
        import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai";
        import { marked } from "https://esm.run/marked";

        function getApiKey() {
            let apiKey = localStorage.getItem("GEMINI_API_KEY");
            if (!apiKey) {
                apiKey = prompt("Please enter your Gemini API key:");
                if (apiKey) {
                    localStorage.setItem("GEMINI_API_KEY", apiKey);
                }
            }
            return apiKey;
        }

        async function getGenerativeModel(params) {
            const API_KEY = getApiKey();
            const genAI = new GoogleGenerativeAI(API_KEY);
            return genAI.getGenerativeModel(params);
        }

        async function fileToGenerativePart(file) {
            return new Promise((resolve) => {
                const reader = new FileReader();
                reader.onloadend = () => resolve({
                    inlineData: { 
                        data: reader.result.split(",")[1],
                        mimeType: file.type 
                    }
                });
                reader.readAsDataURL(file);
            });
        }

        function resizeAndCompressImage(file) {
            return new Promise((resolve) => {
                const reader = new FileReader();
                reader.onload = function(event) {
                    const img = new Image();
                    img.onload = function() {
                        const canvas = document.createElement('canvas');
                        const ctx = canvas.getContext('2d');

                        let width = img.width;
                        let height = img.height;

                        if (width > 1000) {
                            height = Math.round((height * 1000) / width);
                            width = 1000;
                        }

                        canvas.width = width;
                        canvas.height = height;

                        ctx.drawImage(img, 0, 0, width, height);

                        canvas.toBlob((blob) => {
                            resolve(new File([blob], "compressed_image.jpg", { type: "image/jpeg" }));
                        }, 'image/jpeg', 0.7);
                    };
                    img.src = event.target.result;
                };
                reader.readAsDataURL(file);
            });
        }

        async function processImageAndPrompt() {
            const fileInput = document.getElementById('imageInput');
            const promptInput = document.getElementById('promptInput');
            const resultDiv = document.getElementById('result');
            const modelSelect = document.getElementById('modelSelect');
            
            if (!promptInput.value) {
                alert('Please enter a prompt.');
                return;
            }

            resultDiv.innerHTML = 'Processing...';
            
            // Clear previous bounding box images
            document.getElementById('boundingBoxImages').innerHTML = '';
            
            try {
                const model = await getGenerativeModel({ model: modelSelect.value });
                let content = [promptInput.value];

                if (fileInput.files[0]) {
                    const compressedImage = await resizeAndCompressImage(fileInput.files[0]);
                    const imagePart = await fileToGenerativePart(compressedImage);
                    content.push(imagePart);
                }
                
                const result = await model.generateContent(content);
                const response = await result.response;
                const text = response.text();
                
                resultDiv.innerHTML = marked.parse(text);

                if (fileInput.files[0]) {
                    // Extract coordinates from the response
                    const coordinates = extractCoordinates(text);
                    const points = extractPoints(text);

                    if (points.length > 0) {
                        displayImageWithPoints(fileInput.files[0], points);
                    } else if (coordinates.length > 0) {
                        displayImageWithBoundingBoxes(fileInput.files[0], coordinates);
                    }
                }
            } catch (error) {
                resultDiv.innerHTML = `Error: ${error.message}`;
            }
        }

        function extractCoordinates(text) {
            const regex = /\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]/g;
            const matches = text.match(regex) || [];
            return matches.map(JSON.parse);
        }

        function extractPoints(text) {
            // Try to extract JSON array with point objects
            try {
                // Look for JSON array pattern
                const jsonMatch = text.match(/\[[\s\S]*?\{"point"[\s\S]*?\](?=\s*$|\s*```|\s*\n\n)/);
                if (jsonMatch) {
                    const parsed = JSON.parse(jsonMatch[0]);
                    if (Array.isArray(parsed) && parsed.length > 0 && parsed[0].point) {
                        return parsed;
                    }
                }
            } catch (e) {
                // If parsing fails, return empty array
            }
            return [];
        }

        function displayImageWithBoundingBoxes(file, coordinates) {
            const reader = new FileReader();
            reader.onload = function(event) {
                const image = new Image();
                image.onload = function() {
                    const canvas = document.getElementById('canvas');
                    canvas.width = image.width + 100;
                    canvas.height = image.height + 100;
                    const ctx = canvas.getContext('2d');
                    
                    // Draw the image
                    ctx.drawImage(image, 80, 20);

                    // Draw grid lines
                    ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';  // Red with 50% opacity
                    ctx.lineWidth = 1;

                    // Vertical grid lines
                    for (let i = 0; i <= 1000; i += 100) {
                        const x = 80 + i / 1000 * image.width;
                        ctx.beginPath();
                        ctx.moveTo(x, 20);
                        ctx.lineTo(x, image.height + 20);
                        ctx.stroke();
                    }

                    // Horizontal grid lines
                    for (let i = 0; i <= 1000; i += 100) {
                        const y = 20 + (1000 - i) / 1000 * image.height;
                        ctx.beginPath();
                        ctx.moveTo(80, y);
                        ctx.lineTo(image.width + 80, y);
                        ctx.stroke();
                    }

                    // Draw bounding boxes
                    const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
                    const boundingBoxImages = document.getElementById('boundingBoxImages');
                    boundingBoxImages.innerHTML = ''; // Clear previous bounding box images

                    coordinates.forEach((box, index) => {
                        const [ymin, xmin, ymax, xmax] = box.map(coord => coord / 1000);
                        
                        const width = (xmax - xmin) * image.width;
                        const height = (ymax - ymin) * image.height;

                        ctx.strokeStyle = colors[index % colors.length];
                        ctx.lineWidth = 5;
                        ctx.strokeRect(xmin * image.width + 80, ymin * image.height + 20, width, height);

                        // Extract bounding box image
                        const boundingBoxCanvas = document.createElement('canvas');
                        boundingBoxCanvas.width = width;
                        boundingBoxCanvas.height = height;
                        const bbCtx = boundingBoxCanvas.getContext('2d');
                        bbCtx.drawImage(image, xmin * image.width, ymin * image.height, width, height, 0, 0, width, height);

                        // Convert canvas to JPEG base64 data URI
                        const dataURI = boundingBoxCanvas.toDataURL('image/jpeg');

                        const boundingBoxContainer = document.createElement('div');
                        boundingBoxContainer.className = 'bounding-box-container';

                        const title = document.createElement('p');
                        title.textContent = `Bounding Box: [${box.join(', ')}]`;
                        boundingBoxContainer.appendChild(title);

                        const img = document.createElement('img');
                        img.src = dataURI;
                        boundingBoxContainer.appendChild(img);

                        boundingBoxImages.appendChild(boundingBoxContainer);
                    });

                    // Draw axes and labels
                    ctx.strokeStyle = '#000000';
                    ctx.lineWidth = 1;
                    ctx.font = '26px Arial';
                    ctx.textAlign = 'right';

                    // Y-axis
                    ctx.beginPath();
                    ctx.moveTo(80, 20);
                    ctx.lineTo(80, image.height + 20);
                    ctx.stroke();

                    // Y-axis labels and ticks
                    for (let i = 0; i <= 1000; i += 100) {
                        const y = 20 + i / 1000 * image.height;
                        ctx.fillText(i.toString(), 75, y + 5);
                        ctx.beginPath();
                        ctx.moveTo(75, y);
                        ctx.lineTo(80, y);
                        ctx.stroke();
                    }

                    // X-axis
                    ctx.beginPath();
                    ctx.moveTo(80, image.height + 20);
                    ctx.lineTo(image.width + 80, image.height + 20);
                    ctx.stroke();

                    // X-axis labels and ticks
                    ctx.textAlign = 'center';
                    for (let i = 0; i <= 1000; i += 100) {
                        const x = 80 + i / 1000 * image.width;
                        ctx.fillText(i.toString(), x, image.height + 40);
                        ctx.beginPath();
                        ctx.moveTo(x, image.height + 20);
                        ctx.lineTo(x, image.height + 25);
                        ctx.stroke();
                    }
                };
                image.src = event.target.result;
            };
            reader.readAsDataURL(file);
        }

        function displayImageWithPoints(file, points) {
            const reader = new FileReader();
            reader.onload = function(event) {
                const image = new Image();
                image.onload = function() {
                    const canvas = document.getElementById('canvas');
                    canvas.width = image.width + 100;
                    canvas.height = image.height + 100;
                    const ctx = canvas.getContext('2d');

                    // Draw the image
                    ctx.drawImage(image, 80, 20);

                    // Draw grid lines
                    ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';  // Red with 50% opacity
                    ctx.lineWidth = 1;

                    // Vertical grid lines
                    for (let i = 0; i <= 1000; i += 100) {
                        const x = 80 + i / 1000 * image.width;
                        ctx.beginPath();
                        ctx.moveTo(x, 20);
                        ctx.lineTo(x, image.height + 20);
                        ctx.stroke();
                    }

                    // Horizontal grid lines
                    for (let i = 0; i <= 1000; i += 100) {
                        const y = 20 + (1000 - i) / 1000 * image.height;
                        ctx.beginPath();
                        ctx.moveTo(80, y);
                        ctx.lineTo(image.width + 80, y);
                        ctx.stroke();
                    }

                    // Draw points
                    const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
                    const boundingBoxImages = document.getElementById('boundingBoxImages');
                    boundingBoxImages.innerHTML = ''; // Clear previous content

                    points.forEach((item, index) => {
                        const [y, x] = item.point.map(coord => coord / 1000);
                        const label = item.label || '';

                        // Calculate pixel position
                        const pixelX = x * image.width + 80;
                        const pixelY = y * image.height + 20;

                        const color = colors[index % colors.length];

                        // Draw point as a circle
                        ctx.fillStyle = color;
                        ctx.strokeStyle = '#FFFFFF';
                        ctx.lineWidth = 2;
                        ctx.beginPath();
                        ctx.arc(pixelX, pixelY, 8, 0, 2 * Math.PI);
                        ctx.fill();
                        ctx.stroke();

                        // Draw label background
                        ctx.font = 'bold 16px Arial';
                        const textMetrics = ctx.measureText(label);
                        const textWidth = textMetrics.width;
                        const textHeight = 16;
                        const padding = 4;

                        // Position label to the right of the point
                        const labelX = pixelX + 12;
                        const labelY = pixelY - 8;

                        ctx.fillStyle = 'rgba(0, 0, 0, 0.7)';
                        ctx.fillRect(labelX, labelY, textWidth + padding * 2, textHeight + padding);

                        // Draw label text
                        ctx.fillStyle = '#FFFFFF';
                        ctx.textAlign = 'left';
                        ctx.textBaseline = 'top';
                        ctx.fillText(label, labelX + padding, labelY + padding);

                        // Add point info to the list below
                        const pointContainer = document.createElement('div');
                        pointContainer.className = 'bounding-box-container';

                        const title = document.createElement('p');
                        title.style.color = color;
                        title.textContent = `Point: [${item.point.join(', ')}] - Label: ${label}`;
                        pointContainer.appendChild(title);

                        boundingBoxImages.appendChild(pointContainer);
                    });

                    // Draw axes and labels
                    ctx.strokeStyle = '#000000';
                    ctx.lineWidth = 1;
                    ctx.font = '26px Arial';
                    ctx.textAlign = 'right';

                    // Y-axis
                    ctx.beginPath();
                    ctx.moveTo(80, 20);
                    ctx.lineTo(80, image.height + 20);
                    ctx.stroke();

                    // Y-axis labels and ticks
                    for (let i = 0; i <= 1000; i += 100) {
                        const y = 20 + i / 1000 * image.height;
                        ctx.fillStyle = '#000000';
                        ctx.fillText(i.toString(), 75, y + 5);
                        ctx.beginPath();
                        ctx.moveTo(75, y);
                        ctx.lineTo(80, y);
                        ctx.stroke();
                    }

                    // X-axis
                    ctx.beginPath();
                    ctx.moveTo(80, image.height + 20);
                    ctx.lineTo(image.width + 80, image.height + 20);
                    ctx.stroke();

                    // X-axis labels and ticks
                    ctx.textAlign = 'center';
                    for (let i = 0; i <= 1000; i += 100) {
                        const x = 80 + i / 1000 * image.width;
                        ctx.fillStyle = '#000000';
                        ctx.fillText(i.toString(), x, image.height + 40);
                        ctx.beginPath();
                        ctx.moveTo(x, image.height + 20);
                        ctx.lineTo(x, image.height + 25);
                        ctx.stroke();
                    }
                };
                image.src = event.target.result;
            };
            reader.readAsDataURL(file);
        }

        function clearImage() {
            document.getElementById('imageInput').value = '';
            document.getElementById('canvas').getContext('2d').clearRect(0, 0, canvas.width, canvas.height);
            document.getElementById('imagePreview').src = '';
            document.getElementById('imagePreview').style.display = 'none';
            document.getElementById('boundingBoxImages').innerHTML = '';
        }

        function previewImage(event) {
            const file = event.target.files[0];
            if (file) {
                const reader = new FileReader();
                reader.onload = function(e) {
                    const img = document.getElementById('imagePreview');
                    img.src = e.target.result;
                    img.style.display = 'block';
                }
                reader.readAsDataURL(file);
            }
        }

        function handleModelChange() {
            const modelSelect = document.getElementById('modelSelect');
            const promptInput = document.getElementById('promptInput');

            if (modelSelect.value === 'gemini-robotics-er-1.5-preview') {
                promptInput.value = 'Point to no more than 10 items in the image. The label returned should be an identifying name for the object detected. The answer should follow the json format: [{"point": [y, x], "label": <label1>}, ...]. The points are in [y, x] format normalized to 0-1000.';
            } else {
                promptInput.value = 'Return bounding boxes as JSON arrays [ymin, xmin, ymax, xmax]\n';
            }
        }

        function initializeFromURL() {
            const urlParams = new URLSearchParams(window.location.search);
            const modelParam = urlParams.get('model');

            if (modelParam) {
                const modelSelect = document.getElementById('modelSelect');
                modelSelect.value = modelParam;
                handleModelChange();
            }
        }

        // Attach event listeners
        document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
        document.getElementById('clearImageBtn').addEventListener('click', clearImage);
        document.getElementById('imageInput').addEventListener('change', previewImage);
        document.getElementById('modelSelect').addEventListener('change', handleModelChange);

        // Initialize from URL parameters
        initializeFromURL();
    </script>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
        }
        textarea {
            width: 100%;
            height: 100px;
        }
        #result, #imageContainer {
            margin-top: 20px;
            border: 1px solid #ccc;
            padding: 10px;
        }
        #canvas {
            max-width: 100%;
            height: auto;
        }
        #imagePreview {
            max-width: 100%;
            display: none;
            margin-top: 10px;
        }
        #boundingBoxImages {
            margin-top: 20px;
        }
        #boundingBoxImages img {
            max-width: 100%;
        }
        .bounding-box-container {
            display: block;
            margin-bottom: 20px;
        }
        .bounding-box-container p {
            margin: 0 0 5px 0;
            font-weight: bold;
        }
        .bounding-box-container img {
            display: block;
            border: 1px solid #ccc;
        }
    </style>
</head>
<body>
    <h1>Gemini API Image Bounding Box Visualization</h1>
    <select id="modelSelect">
        <option value="gemini-2.5-flash">gemini-2.5-flash (default)</option>
        <option value="gemini-robotics-er-1.5-preview">gemini-robotics-er-1.5-preview</option>
        <option value="gemini-3-pro-preview">gemini-3-pro-preview</option>
        <option value="gemini-2.5-pro">gemini-2.5-pro</option>
        <option value="gemini-2.5-flash-lite-preview-06-17">gemini-2.5-flash-lite-preview-06-17</option>
        <option value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option>
        <option value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option>
        <option value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option>
        <option value="gemini-2.0-flash-lite">gemini-2.0-flash-lite</option>
        <option value="gemini-2.0-pro-exp-02-05">gemini-2.0-pro-exp-02-05</option>
        <option value="gemini-2.0-flash">gemini-2.0-flash</option>
        <option value="gemini-2.0-flash-thinking-exp-01-21">gemini-2.0-flash-thinking-exp-01-21</option>
        <option value="gemini-2.0-flash-thinking-exp-1219">gemini-2.0-flash-thinking-exp-1219</option>
        <option value="gemini-2.0-flash-exp">gemini-2.0-flash-exp</option>
        <option value="gemini-exp-1206">gemini-exp-1206</option>
        <option value="gemini-exp-1121">gemini-exp-1121</option>
        <option value="gemini-exp-1114">gemini-exp-1114</option>
        <option value="gemini-1.5-flash-8b-001">gemini-1.5-flash-8b-001</option>
        <option value="gemini-1.5-flash-8b-latest">gemini-1.5-flash-8b-latest</option>
        <option value="gemini-1.5-flash-002">gemini-1.5-flash-002</option>
        <option value="gemini-1.5-pro-002">gemini-1.5-pro-002</option>
        <option value="gemini-1.5-flash-001">gemini-1.5-flash-001</option>
        <option value="gemini-1.5-pro-001">gemini-1.5-pro-001</option>
        <option value="gemini-1.5-flash-latest">gemini-1.5-flash-latest</option>
        <option value="gemini-1.5-pro-latest">gemini-1.5-pro-latest</option>
        <option value="gemini-pro">gemini-pro</option>
    </select>
    <input type="file" id="imageInput" accept="image/*">
    <button id="clearImageBtn">Clear Image</button>
    <textarea id="promptInput">Return bounding boxes as JSON arrays [ymin, xmin, ymax, xmax]
</textarea>
    <button id="submitBtn">Process</button>
    <div id="result"></div>
    <div id="imageContainer">
        <img id="imagePreview" alt="Image preview" />
        <canvas id="canvas"></canvas>
    </div>
    <div id="boundingBoxImages"></div>
</body>
</html>
